/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2005 by Myricom, Inc.  All rights reserved.                 *
 *************************************************************************/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <time.h>

#include "mx_timing.h"
#include "mxsmpi_wrap.h"


#define WARMUP 32

static int 
double_cmp(const void *ap, const void *bp)
{
  const double *a = ap,*b = bp;
  return *a > *b ? 1 : *a == *b ? 0 : -1;
}

int main(int argc, char **argv)
{
  char * sbuf, *rbuf;
  int i;
  int nprocs, myrank;
  MPI_Status status;
  mx_cycles_t *timings;
  double *dtimes;

  int length = argc > 1 ? atoi(argv[1]) : 0;
  int iter = argc > 2 ? atoi(argv[2]) : 10;

  dtimes = calloc(iter,sizeof(dtimes[0]));
  timings = calloc(iter+WARMUP+1,sizeof(timings[0]));
  
  mx_cycles_counter_init();
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
  if (nprocs != 2) {
    fprintf(stderr,"mpi_pingpong requires two nodes(I am %d out of %d)\n",
	    myrank, nprocs);
    exit(1);
  } else {
    fprintf(stderr,"process %d out of %d started\n", myrank, nprocs);
  }
  sbuf = malloc(length);
  rbuf = malloc(length);
  assert(sbuf && rbuf);
  if (myrank == 0) {
    for (i=0;i<iter+WARMUP;i++) {
      timings[i] = mx_get_cycles();
      MPI_Send(sbuf, length, MPI_BYTE, 1, 10, MPI_COMM_WORLD);
      MPI_Recv(rbuf, length, MPI_BYTE, 1, 10, MPI_COMM_WORLD, &status);
    }
    timings[i] = mx_get_cycles();
  } else {
    for (i=0;i<iter+WARMUP;i++) {
      MPI_Recv(rbuf, length, MPI_BYTE, 0, 10, MPI_COMM_WORLD, &status);
      MPI_Send(sbuf, length, MPI_BYTE, 0, 10, MPI_COMM_WORLD);
    }
  }
  if (myrank == 0) {
    double avg;
    memmove(timings,timings+WARMUP,sizeof(timings[0])*(iter+1));
    for (i=0;i<iter;i++) {
      dtimes[i] = (timings[i+1] - timings[i])*mx_seconds_per_cycle();
    }
    avg = (timings[iter] - timings[0]) *mx_seconds_per_cycle() / iter;
    
    qsort(dtimes, iter, sizeof(dtimes[0]), double_cmp);
    printf("median,\tavg,\tworst,\tbest\n %.3f us\t%.3fus\t%.3fus\t%.3fus\n", 
	   dtimes[iter/2]*1e6/2,avg*1e6/2, dtimes[iter-1]*1e6/2, dtimes[0]*1e6/2);
    printf("median,\tavg,\tworst,\tmax\n %gMB/s\t%gMB/s\t%gMB/s\t%gMB/s\n", 
	   length*2/dtimes[iter/2]/1e6,length*2/avg/1e6,length*2/dtimes[iter-1]/1e6,length*2/dtimes[0]/1e6);
  }
  
  MPI_Finalize();
  
  return 0;
}
